package com.pap.base.util.regex;

import java.util.BitSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;

/**
 * 正则工具类
 * 
 * @author alexgaoyh
 *
 */
public class RegexUtilss {

	private static BitSet dontNeedEncoding;

	static {
		dontNeedEncoding = new BitSet(256);
		int i;
		char sta = 'a';
		char end = 'z';
		char sta1 = 'A';
		char end1 = 'Z';
		char zero = '0';
		char nine = '9';
		for (i = sta; i <= end; i++) {
			dontNeedEncoding.set(i);
		}
		for (i = sta1; i <= end1; i++) {
			dontNeedEncoding.set(i);
		}
		for (i = zero; i <= nine; i++) {
			dontNeedEncoding.set(i);
		}
		dontNeedEncoding.set('+');
		dontNeedEncoding.set('-');
		dontNeedEncoding.set('_');
		dontNeedEncoding.set('.');
		dontNeedEncoding.set('*');
	}

	/**
	 * 判断指定的字符串是否符合某个正则表达式
	 *
	 * @param content
	 *            字符串
	 * @param regex
	 *            正则表达式
	 * @param caseSentivite
	 *            是否大小写敏感，true区分大小写，false不区分
	 * @return 符合返回true，否则返回false
	 */
	public static boolean isMatchString(String content, String regex, boolean caseSentivite) {
		Pattern pattern;
		if (StringUtils.isAnyBlank(content, regex)) {
			return false;
		}
		if (!caseSentivite) {
			pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
		} else {
			pattern = Pattern.compile(regex);
		}
		final Matcher matcher = pattern.matcher(content.trim());
		return matcher.matches();
	}

	public static boolean isChinese(String str) {
		if (StringUtils.isBlank(str)) {
			return false;
		}
		char[] ch = str.toCharArray();
		for (char c : ch) {
			if (isChinese(c)) {
				return true;
			}
		}
		return false;
	}

	private static boolean isChinese(char c) {
		Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
		return ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
				|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
				|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
				|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
				|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
				|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
				|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION;
	}

	/**
	 * 判断str是否urlEncoder.encode过<br>
	 * 经常遇到这样的情况，拿到一个URL,但是搞不清楚到底要不要encode.<Br>
	 * 不做encode吧，担心出错，做encode吧，又怕重复了<Br>
	 */
	public static boolean isUrlEncoded(String str) {

		/*
		 * 支持JAVA的URLEncoder.encode出来的string做判断。 即: 将' '转成'+' <br> 0-9a-zA-Z保留 <br>
		 * '-'，'_'，'.'，'*'保留 <br> 其他字符转成%XX的格式，X是16进制的大写字符，范围是[0-9A-F]
		 */
		boolean needEncode = false;
		for (int i = 0; i < str.length(); i++) {
			char c = str.charAt(i);
			if (dontNeedEncoding.get((int) c)) {
				continue;
			}
			if (c == '%' && (i + 2) < str.length()) {
				// 判断是否符合urlEncode规范
				char c1 = str.charAt(++i);
				char c2 = str.charAt(++i);
				if (isDigit16Char(c1) && isDigit16Char(c2)) {
					continue;
				}
			}
			// 其他字符，肯定需要urlEncode
			needEncode = true;
			break;
		}

		return !needEncode;
	}

	/**
	 * 判断c是否是16进制的字符
	 */
	private static boolean isDigit16Char(char c) {
		return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F');
	}

}
